\documentclass[11pt, fleqn]{article}

\usepackage{amsfonts, amssymb, amsmath, amsthm}
\usepackage[longnamesfirst]{natbib}
\usepackage[margin=1in]{geometry}
\usepackage{graphicx}
\usepackage{hyperref}

%%% DEFINE INPUT FOLDERS %%%
\makeatletter
\def\input@path{{../}}
\makeatother
\graphicspath{{../}}

\begin{document}

\begin{titlepage}

\title{Documentation for \\ ``Media Sentiment and International Asset Prices''\footnotetext{Fraiberger: World Bank \& NYU, \href{mailto:sfraiberger@worldbank.org}{sfraiberger@worldbank.org}; Lee: NYU, \href{mailto:dql204@nyu.edu}{dql204@nyu.edu}; Puy: IMF, \href{mailto:Dpuy@imf.org}{Dpuy@imf.org}; Ranciere: USC, NBER \& CEPR, \href{mailto:Ranciere@usc.edu}{Ranciere@usc.edu}. The views expressed in this paper are those of the author(s) and do not necessarily represent the views of the IMF, the World Bank, their Executive Boards or their management.}\\}

\author{
Samuel P. Fraiberger \\ 
\and 
Do Lee \\ 
\and 
Damien Puy \\ 
\and 
Romain Ranciere \\
}

\date{\today}
\maketitle\let\endtitlepage\relax

\end{titlepage}

\section{Contents}

This folder contains:
\begin{enumerate}
\item News sentiment indicators used in the paper: \texttt{./data/}
\item The main results from Figures 1 through 3 of the paper: \texttt{./results/}
\item Programs necessary to produce them: \texttt{./code/}
\end{enumerate}

\section{News articles and sentiment measures}

The news sentiment indicators are in \texttt{./indicator/news\_sentiment.csv}. 
\begin{itemize}
\item The data file is a daily panel of 25 countries from 1991 to 2019.
\end{itemize}
In the \texttt{.csv} file, we provide three types of news sentiment indicators:
\begin{enumerate}
\item \texttt{sentiment\_country}: Country-specific news sentiment, available from 1991 to 2019
\begin{itemize}
\item The variable was produced by \texttt{./code/1\_sentiment\_country.do}
\end{itemize}
\item \texttt{sentiment\_local}: Local news sentiment, available from 1991 to 2015
\begin{itemize}
\item The variable was produced by \texttt{./code/2\_sentiment\_local.do}
\end{itemize}
\item \texttt{sentiment\_global}: Global news sentiment, available from 1991 to 2019
\begin{itemize}
\item The variable was produced by \texttt{./code/3\_sentiment\_global.do}
\end{itemize}
\end{enumerate}

%\texttt{./code/4\_merge.do} merges the news sentiment indicators above into a single \texttt{.csv} file.
%
%\texttt{./code/0\_main.do} is a wrapper \texttt{.do} file that executes all the \texttt{.do} files above.

\subsection{Country-specific news sentiment indicator}

\begin{itemize}
\item Our dataset of news articles comes from Factiva.com.
\item Each article is annotated with topics and geographic tags generated by Factiva using a proprietary algorithm.
\item In the main paper, we focused on news articles that were:
\begin{itemize}
\item English articles published by Reuters between 1991 and 2015
\item Tagged with either economic news or financial market news
\item Tagged with one of the 25 countries in our sample (9 AE and 16 EM).
\end{itemize}
\item In this folder, we provide an updated version of the news sentiment index based on Reuters articles published between 1991 and 2019.
\item To measure news sentiment, we use a bag-of-words model, which allows us to reduce complex and multi-dimensional text data into a single number.
\begin{itemize}
\item First, we combine existing lists of positive and negative words found in financial texts by \cite{loughran_mcdonald_2011} and in texts related to economic policy by \cite{young_soroka_2012}.
\item We then expand our lists by including the inflections of each word: for example, the word lose belongs to the negative list, hence we also include the words losing, loser, lost, loss, etc, leading to a final list of 7,217 negative words and 3,250 positive words.
\end{itemize}
\item We define the sentiment of an article as the difference between the share of positive and negative words.
\begin{itemize}
\item In this repository, we provide our baseline estimates where we allow each word to contribute to the sentiment measure proportionally to its frequency of occurrence.
\end{itemize}
\item Finally, we compute a daily sentiment index for each country by taking the average sentiment across articles that are tagged with the country's name.
\item The resulting sentiment index has been expressed as z-scores, in units of standard deviations away from the average within each country.
\item The news article-level data sourced from Factiva.com is proprietary. Researchers interested in reproducing the analysis will need to obtain a subscription for these data sets.
\end{itemize}

\subsection{Local news sentiment indicator}

\begin{itemize}
\item Local news sentiment isolates local news from country-specific news articles.
\item Specifically, we recompute the sentiment index of each country after excluding any article mentioning any other country.
\item Applying this filter allows us to capture the sentiment of purely local news.
\end{itemize}

\subsection{Global news sentiment indicator}

\begin{itemize}
\item Global news sentiment index captures the tone of news published in the world every day.
\item We extract a common factor from our country-specific sentiment series using a Kalman filter.
\item Formally, we estimate a single (latent) factor model in the spirit of \cite{STOCK2016415}.
\begin{itemize}
\item We use an $AR(8)$ for the factor structure and estimate the model using Maximum Likelihood.
\end{itemize}
\item The data coverage for news articles does not start at the same time for all countries.
\begin{itemize}
\item In order to ensure that all countries in our sample have the longest time coverage, we impute these missing observations with zeroes when we estimate the factor model to extract the global news sentiment.
\end{itemize}
\end{itemize}

\section{Documentation for other variables}

The data for producing the main results is a Stata \texttt{.dta} file named \texttt{./data/regression\_sample.dta}. 
\\ We put together a country-level daily panel from the following sources: 

\begin{enumerate}
\item \textit{Daily equity returns} are based on each country's main stock market index. 
\item \textit{Daily equity trading volumes} are collected from reports by local stock exchanges. 
\begin{itemize}
\item Following \citet{campbell_grossman_wang_1993} and \citet{tetlock_2007}, we compute the de-trended daily log trading volume where the trend is the 60-day rolling average. 
\end{itemize}
\item \textit{Stock market volatility} computed by 
\begin{enumerate}
\item[(i)] de-meaning each daily stock return, 
\item[(ii)] taking the square of this residual, and 
\item[(iii)] subtracting the past 60-day moving average of the squared residuals.
\end{enumerate}
\item \textit{VIX} proxies for global volatility. % Source: Chicago Board Options Exchange. 
\item \textit{Dow Jones World Index} measures world equity returns. 
\item \textit{S\&P Goldman Sachs Commodity Index} measures daily returns in commodity prices. 
\end{enumerate}

The data sourced from \textit{Dow Jones World Index} and \textit{S\&P Goldman Sachs Commodity Index} are proprietary. Researchers interested in reproducing the analysis will need to obtain a subscription for these data sets. 


\bibliographystyle{aer}
\bibliography{./references}

\end{document}